/**
 *  Copyright(C) 2012-2013 Suntec(Shanghai) Software Co., Ltd.
 *  All Right Reserved.
 */
package com.cugbmao.luence;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.Socket;
import java.net.UnknownHostException;

/**
 * Descriptions
 * 
 * @version 2013年5月21日
 * @author Suntec
 * @since JDK1.6
 * 
 */
public class WebCrawler {
	private static String text_file_path = "D:\\Lucene\\html\\htmlsrc.html";

	public static void main( String[] args ) {
		try {
			File file = new File( text_file_path );
			FileWriter fwriter = new FileWriter( file );

			// 生成下载对象
			Socket webclient = new Socket( "www.bnu.edu.cn", 80 );
			PrintWriter result = new PrintWriter( webclient.getOutputStream(), true );
			BufferedReader receiver = new BufferedReader( new InputStreamReader( webclient.getInputStream() ) );

			// 发送HTTP request请求
			result.println( "GET / HTTP/1.1" );
			result.println( "Host: bnu.edu.cn" );
			result.println( "Connection: Close" );
			result.println();

			// 接收HTTP Response 返回的结果信息
			boolean bRet = true;
			StringBuffer sBuffer = new StringBuffer( 8096 );
			while ( bRet ) {
				if ( receiver.ready() ) {
					int idx = 0;
					while ( idx != -1 ) {
						idx = receiver.read();
						if ( idx == '<' ) {
							break;
						}
					}
					while ( idx != -1 ) {
						sBuffer.append( (char) idx );
						idx = receiver.read();
					}
					bRet = false;
				}
			}

			// 显示获得的网页正文，打印到控制台
			String str = sBuffer.toString();
			System.out.println( str );
			fwriter.write( str );
			webclient.close();
			fwriter.close();
		}
		catch ( UnknownHostException e ) {
			System.err.println( "无法访问指定主机。" );
			System.exit( 1 );
		}
		catch ( IOException e ) {
			System.err.println( "下载失败，请检查输入的地址是否正确。" );
			System.exit( 1 );
		}
	}
}
